Avgpooling
对NHWC格式的输入张量执行2D平均池化,并随后进行范围裁剪(Clip)激活。
该算子融合了两个步骤:
平均池化 (Average Pooling):
\[\text{Pool}_{i,j} = \frac{1}{k_h \times k_w} \sum_{m=0}^{k_h-1} \sum_{n=0}^{k_w-1} \text{Input}_{i \cdot s_h + m, j \cdot s_w + n}\]
裁剪激活 (Clipping Activation):
\[\text{Output} = \max(\min\_val, \min(\text{Pool}, \max\_val))\]
- 输入:
input - 输入张量的数据地址。格式: NHWC。
params - 其他参数打包成数组。
core_mask - 核掩码。
- 输出:
output - 输出张量的数据地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp32, int8
MT7004 支持fp16, fp32
参数数组结构:
1long long params[16];
2params[0] = (long long)in_w; 输入特征图的宽度。
3params[1] = (long long)in_h; 输入特征图的高度。
4params[2] = (long long)win_w; 池化核的宽度。
5params[3] = (long long)win_h; 池化核的高度。
6params[4] = (long long)output_w; 输出特征图的宽度。
7params[5] = (long long)output_h; 输出特征图的高度。
8params[6] = (long long)output_batch; 输出特征图的批大小。
9params[7] = (long long)channel; 输出特征图的通道数。
10params[8] = (long long)stride_w; 水平方向的步长。
11params[9] = (long long)stride_h; 垂直方向的步长。
12params[10] = (long long)pad_l; 左边距填充。
13params[11] = (long long)pad_u; 上边距填充。
14params[12] = (long long)&minf; 裁剪范围的最小值地址。
15params[13] = (long long)&maxf; 裁剪范围的最大值地址。
共享存储版本:
-
void i8_avgpool_fusion_s(int8_t *input, int8_t *output, long long *params, int core_mask)
-
void fp_avgpool_fusion_s(float *input, float *output, long long *params, int core_mask)
-
void hp_avgpool_fusion_s(half *input, half *output, long long *params, int core_mask)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <avgpooling.h>
4int main(int argc, char* argv[]) {
5 float *input_ptr = (float *)0x81000000;
6 float *output_ptr = (float *)0x82000000;
7
8 int output_batch = 16; //batch数
9 int channel = 4;
10 int in_w = 16;
11 int in_h = 16;
12
13 int win_w = 2;
14 int win_h = 2;
15 int stride_w = 2;
16 int stride_h = 2;
17 int pad_l = 0;
18 int pad_u = 0;
19 float minf = 0;
20 float maxf = 50;
21
22 //计算output_w和output_h
23 int dividor = in_w + pad_l + 0 - win_w;
24 int output_w = (dividor + stride_w - 1) / stride_w + 1;
25 int dividor2 = in_h + pad_u + 0 - win_h;
26 int output_h = (dividor2 + stride_h - 1) / stride_h + 1;
27
28 long long params[16];
29 params[0] = (long long)in_w;
30 params[1] = (long long)in_h;
31 params[2] = (long long)win_w;
32 params[3] = (long long)win_h;
33 params[4] = (long long)output_w;
34 params[5] = (long long)output_h;
35 params[6] = (long long)output_batch;
36 params[7] = (long long)channel;
37 params[8] = (long long)stride_w;
38 params[9] = (long long)stride_h;
39 params[10] = (long long)pad_l;
40 params[11] = (long long)pad_u;
41 params[12] = (long long)&minf; //注意这里传指针,不能直接强制转换成long long
42 params[13] = (long long)&maxf;
43
44 srand(time(NULL));
45
46 //初始化output_ptr
47 int input_size = output_batch * channel * in_w * in_h;
48 int i;
49 for (i = 0; i < input_size; i++) {
50 input_ptr[i] = (float)(rand() % 100);
51 }
52 int core_mask = 0b1111;
53 fp_avg_pooling_s(input_ptr, output_ptr, params, core_mask);
54 return 0;
55}
私有存储版本:
-
void i8_avgpool_fusion_p(int8_t *input, int8_t *output, long long *params)
-
void fp_avgpool_fusion_p(float *input, float *output, long long *params)
-
void hp_avgpool_fusion_p(half *input, half *output, long long *params)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <avgpooling.h>
4int main(int argc, char* argv[]) {
5 float *input_ptr = (float *)0x10010000;
6 float *output_ptr = (float *)0x10020000;
7
8 int output_batch = 16; //batch数
9 int channel = 4;
10 int in_w = 16;
11 int in_h = 16;
12
13 int win_w = 2;
14 int win_h = 2;
15 int stride_w = 2;
16 int stride_h = 2;
17 int pad_l = 0;
18 int pad_u = 0;
19 float minf = 0;
20 float maxf = 50;
21
22 //计算output_w和output_h
23 int dividor = in_w + pad_l + 0 - win_w;
24 int output_w = (dividor + stride_w - 1) / stride_w + 1;
25 int dividor2 = in_h + pad_u + 0 - win_h;
26 int output_h = (dividor2 + stride_h - 1) / stride_h + 1;
27
28 long long params[16];
29 params[0] = (long long)in_w;
30 params[1] = (long long)in_h;
31 params[2] = (long long)win_w;
32 params[3] = (long long)win_h;
33 params[4] = (long long)output_w;
34 params[5] = (long long)output_h;
35 params[6] = (long long)output_batch;
36 params[7] = (long long)channel;
37 params[8] = (long long)stride_w;
38 params[9] = (long long)stride_h;
39 params[10] = (long long)pad_l;
40 params[11] = (long long)pad_u;
41 params[12] = (long long)&minf; //注意这里传指针,不能直接强制转换成long long
42 params[13] = (long long)&maxf;
43
44 srand(time(NULL));
45
46 //初始化output_ptr
47 int input_size = output_batch * channel * in_w * in_h;
48 int i;
49 for (i = 0; i < input_size; i++) {
50 input_ptr[i] = (float)(rand() % 100);
51 }
52 int core_mask = 0b1111;
53 fp_avg_pooling_p(input_ptr, output_ptr, params);
54 return 0;
55}